1 Load packages

library(tidyverse)
library(pheatmap)
library(splots)
library(patchwork)
library(synergyfinder)
library(ggsignif)
library(broom)
library(ggrepel)

2 Load data

## read ctg data files
ctg_data <- lapply(list.files("~/tas/data/ctg_data/", full.names = T, 
    pattern = "TAS"), function(f) read_tsv(f, col_names = F, 
    col_types = "cci") %>% `colnames<-`(c("screen", "well", "pcount"))) %>% 
    bind_rows() %>% separate(screen, c("date", "operator", "mithras", 
    "experiment_id"), sep = "_", remove = FALSE) %>% separate(experiment_id, 
    c("experiment", "line", "TAS_concentration")) %>% # join compound annotation data
full_join(., lapply(list.files("~/tas/data/annotations/", full.names = T), 
    function(f) read_csv(f)) %>% bind_rows() %>% rename(well = destination.well) %>% 
    select(drug, well, Source_Position, Source) %>% separate(well, 
    c("row", "col"), sep = 1, remove = FALSE) %>% mutate(drug = if_else(is.na(drug), 
    Source, drug), drug = if_else(well == "J19", "DMSO", drug), 
    combination = if_else(col == "13", FALSE, TRUE))) %>% unite(id, 
    c("date", "line", "TAS_concentration", "experiment"), remove = FALSE) %>% 
    mutate(row_num = match(row, LETTERS[1:26]), col_num = as.numeric(col)) %>% 
    # annotate concentrations
mutate(concentration = ifelse(Source_Position == "Rack", NA, 
    ifelse(Source_Position == "Master_Library_Plate", 1, ifelse(Source_Position == 
        "Dilution_Plate_1", 2, ifelse(Source_Position == "Dilution_Plate_2", 
        3, ifelse(Source_Position == "Dilution_Plate_3", 4, ifelse(Source_Position == 
            "Dilution_Plate_4", 5, "no match")))))))

3 Inspect and clean screening data

On the second screening day some plates were read-out twice. They are labeled TAS and TASa. I check if there are profound differences between these plates. First I correlate the pcount of these plates.

ctg_data %>% filter(date == "171212" & line == "19") %>% select(pcount, 
    well, id) %>% spread(id, pcount) %>% select(-well) %>% as.data.frame() %>% 
    cor() %>% pheatmap()

The plates are virtually the same. I also checked differences between zfactors, which are not apparent. I keep the plates with the experiment name “TAS” for simplicity.

ctg_data <- ctg_data %>% filter(experiment != "TASa")

Now I want to gain a broad overview of the asssay plates.

ctg_data %>% select(id, pcount) %>% split(., .$id) %>% lapply(., 
    function(f) f$pcount %>% as.vector) %>% plotScreen(., 6)

Now I focus on column and row-wise effects. Row wise effects dominate clearly

ctg_data %>% ggplot(aes(row, pcount)) + geom_point() + geom_smooth() + 
    facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of rows") + 
    # next figure
ctg_data %>% ggplot(aes(col, pcount)) + geom_point() + geom_smooth() + 
    facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of columns")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

I see some spatial effects on most screening plates. The effect is mostly restricted to row-wise differences. I apply a conservative loss smoothing to correct for these.

## split data by plate and apply loess normalization
ctg_loess <- ctg_data %>% dplyr::select(row_num, col_num, pcount, 
    id) %>% # na values
drop_na() %>% # set the ctrl column 13 to NA mutate(pcount = ifelse(col_num
# == 13, NA, pcount)) %>%
split(.$id) %>% lapply(function(s) {
    ## loess fit. family is 'symmetric' to be robust to outliers
    fit <- loess(pcount ~ row_num + col_num, data = s, family = "symmetric")
    ## apply normalization
    tibble(norm_fac = fit$fitted) %>% cbind(s %>% drop_na(), 
        .) %>% mutate(pcount_norm = pcount - (norm_fac - median(norm_fac)))
}) %>% bind_rows() %>% full_join(ctg_data)

Now I plot the results.

ctg_loess %>% ggplot(aes(row, pcount_norm)) + geom_point() + 
    geom_smooth() + facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of rows") + 
    # next figure
ctg_loess %>% ggplot(aes(col, pcount_norm)) + geom_point() + 
    geom_smooth() + facet_wrap(~id) + theme_bw() + ggtitle("Spatial effects of columns")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

For demo purposes I show one plate before and after correction:

ctg_data %>% filter(TAS_concentration == "1") %>% ggplot(aes(row, 
    pcount)) + geom_point() + # geom_smooth() +
facet_wrap(~line) + theme_bw() + ggtitle("Before Normalization: Spatial effects of rows") + 
    # and after normalization
ctg_loess %>% filter(TAS_concentration == "1") %>% ggplot(aes(row, 
    pcount_norm)) + geom_point() + geom_smooth() + facet_wrap(~line) + 
    theme_bw() + ggtitle("After Normalization:Spatial effects of rows")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

4 QC

Now I look at the distribution of controls in the first screening replicate.

coi <- c("Stauro_500nM", "DMSO")
ctg_loess %>% mutate(line_date_exp = paste0(line, "_", date, 
    "_", experiment)) %>% filter(drug %in% coi) %>% ggplot(aes(drug, 
    pcount_norm, color = combination)) + geom_jitter(alpha = 0.7) + 
    facet_grid(line_date_exp ~ TAS_concentration) + theme_bw()

The distribution seems to be quiet good. I now go on calculating the z-factors for each plate for combined and uncombined wells.

ctg_loess %>% filter(drug %in% coi) %>% group_by(id, drug, combination, 
    TAS_concentration, line, date, experiment) %>% summarise(sd = sd(pcount_norm, 
    na.rm = TRUE), mean = mean(pcount_norm, na.rm = TRUE)) %>% 
    group_by(id, combination, TAS_concentration, line, date, 
        experiment) %>% summarise(zfactor = 1 - ((3 * sum(sd))/abs(range(mean)[1] - 
    range(mean)[2]))) %>% mutate(qc = if_else(zfactor < 0.25, 
    FALSE, TRUE)) %>% ggplot(aes(combination, zfactor, color = qc, 
    shape = date)) + geom_point(alpha = 0.7, size = 5) + facet_grid(line ~ 
    TAS_concentration) + theme_bw() + geom_hline(yintercept = 0.25)

Also the z-factors are okay for most plates. Three plates have slight losses in assay quality, however, this seems still acceptable.

5 Normalize screening data

Again, I take a look at the TAS-untreated ctrls. They should be the same across plates

coi <- c("DMSO", "Stauro_500nM")
ctg_loess %>% filter(drug %in% coi & combination == FALSE) %>% 
    ggplot(aes(TAS_concentration, pcount_norm)) + geom_boxplot(aes(colour = drug), 
    width = 6) + theme_bw() + facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("Before DMSO normalization") + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1))

There are some line specific differences in viability and one outlier plate 171128_27_4. I now normalize the plates to the untreated DMSO control.

ctg_ln <- ctg_loess %>% full_join(., ctg_loess %>% filter(drug == 
    "DMSO" & combination == FALSE) %>% group_by(id) %>% summarise(ctrl = median(pcount_norm, 
    na.rm = TRUE)) %>% ungroup()) %>% group_by(id) %>% mutate(rv = pcount_norm/ctrl) %>% 
    ungroup()
## Joining, by = "id"

Now I look at the controls after normalization.

coi <- c("DMSO", "Stauro_500nM")
ctg_ln %>% filter(drug %in% coi & combination == FALSE) %>% ggplot(aes(TAS_concentration, 
    rv)) + geom_boxplot(aes(colour = drug), width = 6) + theme_bw() + 
    facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("After DMSO normalization") + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1))

The normalization worked well. As intended there are controlled line and TAS dependent effects left.

ctg_ln %>% ggplot(aes(TAS_concentration, rv)) + geom_boxplot(width = 6) + 
    theme_bw() + facet_grid(line ~ date) + # scale_y_continuous(limits = c(-1,250000)) +
ggtitle("Relative viability measurments for every plate") + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1))

I wonder if I have to center the dataset further inbetween 1 and 0. Therefore I plot a histogram of all viabilities.

ctg_ln %>% ggplot(aes(rv)) + geom_histogram(bins = 90) + theme_classic() + 
    geom_vline(xintercept = c(0, 1)) + xlab("relative viability")

As an alternative approach I trim the data between 1 and 0, assuming that these datapoints are only spread and normalization artifacts.

ctg_lnt <- ctg_ln %>% mutate(rv_trim = ifelse(rv < 0, 0, rv), 
    rv_trim = ifelse(rv > 1, 1, rv_trim))
ctg_lnt %>% ggplot(aes(rv_trim)) + geom_histogram(bins = 90) + 
    theme_classic() + geom_vline(xintercept = c(0, 1)) + xlab("relative viability")

I am fine with this distribution. Let’s move on. Before we continue I take a look at the overall correlation.

coi <- c("DMSO", "Stauro_500nM")
tmp <- ctg_ln %>% unite("line_well", c("line", "well", "TAS_concentration")) %>% 
    ungroup() %>% mutate(Replicate = ifelse(date == "171128", 
    1, 2)) %>% dplyr::select(line_well, Replicate, rv, drug) %>% 
    spread(Replicate, rv) %>% mutate(ctrl = if_else(drug %in% 
    coi, TRUE, FALSE))
tmp.cor <- tmp %>% dplyr::select(`1`, `2`) %>% cor(., use = "pairwise.complete.obs") %>% 
    .[1, 2] %>% round(2)
tmp %>% ggplot(aes(x = `1`, y = `2`)) + geom_point(alpha = 0.3) + 
    theme_classic() + ylab("Replicate 2") + xlab("Replicate 1") + 
    ggtitle(paste0("correlation of normalized viabiliy, r =", 
        tmp.cor)) + geom_abline(slope = 1) + # geom_density2d(alpha = 0.7, color = 'black') +
scale_x_continuous(breaks = c(0, 0.5, 1)) + scale_y_continuous(breaks = c(0, 
    0.5, 1)) + geom_hline(yintercept = 1) + geom_vline(xintercept = 1)

# stat_binhex()

6 Synergy determination

I consider using the “synergyfinder” package, published by He et al. at the FIMM. First I have to wrangle the data into a standardized input format. For this I write an ugly function.

prepare_synergy <- function(df, controls){
  #not a universal function
  syn_drug <- df %>%
    filter(!drug %in% controls & combination == TRUE) %>%
    #setup the easy stuff
    mutate(BlockID = paste("TAS-102", drug, sep = "_"),
           Response = rv*100,
           Replicate = ifelse(date == "171128", 1, 2),
           DrugRow = "TAS-102",
           DrugCol = drug,
           #build matrix
           Col = ifelse(drug != "DMSO", as.numeric(concentration), 6),
           Row = ifelse(TAS_concentration != "0", as.numeric(TAS_concentration), 6),
           #define concentrations
           ConcRow = ifelse(TAS_concentration != "0", 25/(5^(as.numeric(TAS_concentration)-1)), 0), #assuming the initila concentration is 50
           ConcCol = 10/(5^(as.numeric(concentration)-1)),
           ConcUnit = "uM"
           )
  
  syn_ctrl <- full_join(syn_drug %>% select(BlockID, line, TAS_concentration, Replicate) %>% distinct(),
                       #add TAS DMSO controls
                     df %>%
              filter(drug == "DMSO" & combination == TRUE) %>%
              mutate(Replicate = ifelse(date == "171128", 1, 2)) %>%
              group_by(line, TAS_concentration, Replicate) %>% 
              mutate(Response = median(rv, na.rm = TRUE)*100) %>%
              mutate(#define concentrations
                     ConcRow = ifelse(TAS_concentration != "0", 50/(5^(as.numeric(TAS_concentration)-1)), 0), #assuming the initila concentration is 50
                     ConcCol = 0,
                     Col = 6,
                     Row = ifelse(TAS_concentration != "0", as.numeric(TAS_concentration), 6),
                     DrugRow = "TAS-102",
                     ConcUnit = "uM"
                       )) %>%
                    #hacky way of formatting the data
                mutate(DrugCol = substr(BlockID, 9, nchar(BlockID)))
  
  syn_mat <- full_join(syn_drug, syn_ctrl) %>% dplyr::select(BlockID, Col, Row, Response, Replicate, DrugRow, DrugCol, ConcRow, ConcCol, ConcUnit, line) %>% distinct()
  
  return(syn_mat)
}

Now I apply this function.

coi <- c("DMSO", "Stauro_500nM")
syn_mat <- prepare_synergy(ctg_lnt, coi)
## Joining, by = c("line", "TAS_concentration", "Replicate")
## Joining, by = c("row_num", "col_num", "pcount", "id", "norm_fac", "pcount_norm", "screen", "date", "operator", "mithras", "experiment", "line", "TAS_concentration", "well", "drug", "row", "col", "Source_Position", "Source", "combination", "concentration", "ctrl", "rv", "rv_trim", "BlockID", "Response", "Replicate", "DrugRow", "DrugCol", "Col", "Row", "ConcRow", "ConcCol", "ConcUnit")
# I also determine the synergy for the trimmed dataset
syn_mat_t <- prepare_synergy(ctg_lnt %>% dplyr::select(-rv) %>% 
    rename(rv = rv_trim), coi)
## Joining, by = c("line", "TAS_concentration", "Replicate")
## Joining, by = c("row_num", "col_num", "pcount", "id", "norm_fac", "pcount_norm", "screen", "date", "operator", "mithras", "experiment", "line", "TAS_concentration", "well", "drug", "row", "col", "Source_Position", "Source", "combination", "concentration", "ctrl", "rv", "BlockID", "Response", "Replicate", "DrugRow", "DrugCol", "Col", "Row", "ConcRow", "ConcCol", "ConcUnit")

Now I define a wrapper function that can apply multiple synergy calculations and report the results in a tidy format. For now I stick to calulating the average synergy scores.

tidy_synergy = function(df, me){ lapply(as.list(me), function(m, f = df){
  #hacky way of ignoring cases in which the synergy scores could not be calculated
  return(tryCatch(f %>% 
      ReshapeData(., data.type = "viability") %>%
      #PlotDoseResponse() %>%
      CalculateSynergy(method = m, correction = TRUE) %>%
    tibble(BlockID = .$drug.pairs %>% simplify() %>% .[4],
           method = .$method,
           score_average = .$scores %>% .[[1]] %>% .[-1,-1] %>% simplify() %>% mean(),
           score_sd = .$scores %>% .[[1]] %>% .[-1,-1] %>% simplify() %>% sd()) %>%
    select(BlockID, method, score_average, score_sd) %>% distinct(), 
    error=function(e) NULL))}
) %>% bind_rows()
  #PlotSynergy(type = "all")
  }

I apply the function on the dataset.

tas_syn <- syn_mat %>% group_by(line, Replicate, BlockID) %>% 
    do(tidy_synergy(df = ., me = c("ZIP", "HSA", "Bliss", "Loewe"))) %>% 
    ungroup() %>% # complete the df for scores that could not be calculated
mutate(method = as.factor(method), line = as.factor(line)) %>% 
    complete(BlockID, method, Replicate, line) %>% mutate(score_average = ifelse(is.nan(score_average), 
    NA, score_average))

As an alternative I calculate synergy scores only for selected query concentrations. I start out by skipping the highest tested (TAS) concentration.

syn_mat_low <- syn_mat %>% filter(Row != 1) %>% mutate(Row = Row - 
    1)
# syn_mat_low <- syn_mat %>% filter(Row != 1 & Col != 1) %>%
# mutate(Row = Row -1, Col = Col -1)
tas_syn_l <- syn_mat_low %>% group_by(line, Replicate, BlockID) %>% 
    do(tidy_synergy(df = ., me = c("ZIP", "HSA", "Bliss", "Loewe"))) %>% 
    ungroup() %>% # complete the df for scores that could not be calculated
mutate(method = as.factor(method), line = as.factor(line)) %>% 
    complete(BlockID, method, Replicate, line) %>% mutate(score_average = ifelse(is.nan(score_average), 
    NA, score_average))

6.1 Comparing dropout synergy calculation

First I wonder how many models were fitted in both approaches.

df <- tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average)
tibble(complete_na = df$complete %>% is.na() %>% sum(), wo_highest_na = df$wo_highest %>% 
    is.na() %>% sum(), frac_comp = complete_na/nrow(df), frac_wo_hi = wo_highest_na/nrow(df))

Now I am interested in the correltaion between synergy scores

tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average) %>% 
    ggplot(aes(complete, wo_highest)) + geom_point() + theme_classic()
## Warning: Removed 301 rows containing missing values (geom_point).

tibble(complete = tas_syn$score_average, wo_highest = tas_syn_l$score_average) %>% 
    corrr::correlate()

For now I add the synergy data which was calculated without the highest concentration to the dataset.

tas_syn <- full_join(tas_syn, tas_syn_l %>% rename(score_average_l = score_average, 
    score_sd_l = score_sd))
## Joining, by = c("BlockID", "method", "Replicate", "line")

Now I want to inspect the distribution of synergy scores.

tas_syn %>% ggplot(aes(score_average)) + geom_histogram() + facet_grid(line ~ 
    method)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 167 rows containing non-finite values (stat_bin).

For now, I will stick to to ZIP method which calculates a delta-score. In recent publications this score is >15 for strong interactions and <-10 for antagonistic combinations. I observed that on multiple occasions not all statistics could be calculated. I want to see how often theses errors happen.

# tas_syn %>% group_by(BlockID, method) %>% summarise(n =
# sum(is.na(score_average))) %>% filter(n != 0) %>%
# ggplot(aes(n)) + geom_histogram() + facet_grid(~ method)
# define list
fb <- tas_syn %>% group_by(BlockID, method) %>% summarise(n = sum(is.na(score_average))) %>% 
    filter(n > 4) %>% .$BlockID

Errors during calculation happen in about 10% of all drug combinations, probably due to an inability to a model for the data. I define a list of combinations which will be excluded for now due to too sparse data.

tas_test <- lapply(as.list(tas_syn %>% filter(!BlockID %in% fb) %>% 
    .$BlockID %>% unique()), function(boi) tas_syn %>% filter(!BlockID %in% 
    fb) %>% mutate(test = if_else(BlockID == boi, TRUE, FALSE), 
    f_method = method) %>% group_by(f_method) %>% do(tidy(t.test(score_average ~ 
    test, data = .))) %>% mutate(BlockID = boi)) %>% bind_rows() %>% 
    group_by(f_method) %>% mutate(p.adj = p.value %>% p.adjust(method = "BH"))
tas_test %>% ggplot(aes(p.adj)) + geom_histogram() + facet_wrap(~f_method) + 
    theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

7 Synergy plotting

7.1 Synergy Dotplots

Now I am interested in compounds which are significantly different from the rest of all tested combinations.

tas_test %>% group_by(BlockID) %>% mutate(avg_p = mean(p.adj, 
    na.rm = TRUE)) %>% arrange(avg_p) %>% ungroup() %>% mutate(BlockID = factor(BlockID, 
    levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, p.adj)) + 
    geom_point() + theme_minimal() + scale_y_log10() + facet_wrap(~f_method) + 
    geom_hline(yintercept = 0.05) + geom_label_repel(data = tas_test %>% 
    filter(p.adj < 0.05), aes(label = BlockID)) + theme(axis.text.x = element_blank(), 
    axis.ticks.x = element_blank())

Next to one combination with strong synergism I also look for other potent combinations. In the following plot I show the synergy scores for all categories and in red the synergy score based on the drop-out data.

tas_syn %>% group_by(BlockID, method) %>% mutate(avg_s = mean(score_average, 
    na.rm = TRUE), avg_sl = mean(score_average_l, na.rm = TRUE)) %>% 
    group_by(BlockID) %>% mutate(avg_a = mean(score_average, 
    na.rm = TRUE)) %>% arrange(avg_a) %>% ungroup() %>% mutate(BlockID = factor(BlockID, 
    levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, avg_s)) + 
    geom_point(color = "black") + geom_point(aes(BlockID, avg_sl), 
    color = "red") + theme_classic() + geom_hline(yintercept = 0) + 
    # scale_y_log10() +
facet_wrap(~method) + # geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% group_by(BlockID, method) %>% 
    summarise(avg_s = mean(score_average, na.rm = TRUE)) %>% 
    group_by(method) %>% do(arrange(., desc(avg_s)) %>% head(3)), 
    aes(label = BlockID), hjust = -0.1, vjust = -0.1) + ylab("Average Synergy Score") + 
    xlab("Drug Combinations") + theme(axis.text.x = element_blank(), 
    axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust

Now I invert the color scheme and arrange compounds according to the drop-out synergies. In blue I show the “complete” synergy scores.

tas_syn %>% group_by(BlockID, method) %>% mutate(avg_s = mean(score_average, 
    na.rm = TRUE), avg_sl = mean(score_average_l, na.rm = TRUE)) %>% 
    group_by(BlockID) %>% mutate(avg_al = mean(score_average_l, 
    na.rm = TRUE)) %>% arrange(avg_al) %>% ungroup() %>% mutate(BlockID = factor(BlockID, 
    levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, avg_sl)) + 
    geom_point(color = "black") + geom_point(aes(BlockID, avg_s), 
    color = "blue") + theme_classic() + geom_hline(yintercept = 0) + 
    # scale_y_log10() +
facet_wrap(~method) + # geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% group_by(BlockID, method) %>% 
    summarise(avg_sl = mean(score_average_l, na.rm = TRUE)) %>% 
    group_by(method) %>% do(arrange(., desc(avg_sl)) %>% head(3)), 
    aes(label = BlockID), hjust = -0.1, vjust = -0.1) + ylab("Average Synergy Score") + 
    xlab("Drug Combinations") + theme(axis.text.x = element_blank(), 
    axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust

The Bliss score is an established measure of drug synergy. Therefore I plot it in more detail. First I plot only the complete averages across all lines and replicates.

tas_syn %>% filter(method == "Bliss") %>% group_by(BlockID, method) %>% 
    mutate(avg_s = mean(score_average, na.rm = TRUE)) %>% group_by(BlockID) %>% 
    mutate(avg_a = mean(score_average, na.rm = TRUE)) %>% arrange(avg_a) %>% 
    ungroup() %>% mutate(BlockID = factor(BlockID, levels = BlockID %>% 
    unique())) %>% ggplot(aes(BlockID, avg_s)) + geom_point() + 
    theme_classic() + geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
geom_label_repel(data = tas_syn %>% filter(method == "Bliss") %>% 
    group_by(BlockID, method) %>% summarise(avg_s = mean(score_average, 
    na.rm = TRUE)) %>% group_by(method) %>% do(arrange(., desc(avg_s)) %>% 
    head(5)), aes(label = BlockID), hjust = -0.1, vjust = -0.1) + 
    ylab("Bliss Synergy Score") + xlab("Drug Combinations") + 
    theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
## Warning: Ignoring unknown parameters: hjust, vjust

7.2 Synergy Boxplot

To see if there are line specific differences, I plot the data in a boxplot format. The first plot shows the complete synergy dataset.

tas_syn %>% filter(method == "ZIP") %>% group_by(BlockID, method) %>% 
    mutate(avg_s = mean(score_average, na.rm = TRUE), avg_sl = mean(score_average_l, 
        na.rm = TRUE)) %>% group_by(BlockID) %>% mutate(avg_a = mean(score_average, 
    na.rm = TRUE)) %>% arrange(avg_a) %>% ungroup() %>% mutate(BlockID = factor(BlockID, 
    levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, score_average)) + 
    geom_boxplot() + geom_point(aes(color = line)) + theme_classic() + 
    geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
ylab("ZIP Synergy Score") + xlab("Drug Combinations") + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1), axis.ticks.x = element_blank())
## Warning: Removed 39 rows containing non-finite values (stat_boxplot).
## Warning: Removed 39 rows containing missing values (geom_point).

Now I take a look at the “dropout” dataset in a similiar way.

tas_syn %>% filter(method == "ZIP") %>% group_by(BlockID, method) %>% 
    mutate(avg_s = mean(score_average, na.rm = TRUE), avg_sl = mean(score_average_l, 
        na.rm = TRUE)) %>% group_by(BlockID) %>% mutate(avg_al = mean(score_average_l, 
    na.rm = TRUE)) %>% arrange(avg_al) %>% ungroup() %>% mutate(BlockID = factor(BlockID, 
    levels = BlockID %>% unique())) %>% ggplot(aes(BlockID, score_average_l)) + 
    geom_boxplot() + geom_point(aes(color = line)) + theme_classic() + 
    geom_hline(yintercept = 0) + # scale_y_log10() + geom_hline(yintercept = 0.05) +
ylab("ZIP Synergy Score") + xlab("Drug Combinations") + theme(axis.text.x = element_text(angle = 90, 
    hjust = 1), axis.ticks.x = element_blank())
## Warning: Removed 44 rows containing non-finite values (stat_boxplot).
## Warning: Removed 44 rows containing missing values (geom_point).

7.3 Synergy Heatmaps

tas_syn %>% unite(exp, c(method, Replicate, line)) %>% dplyr::select(exp, 
    BlockID, score_average, BlockID) %>% as.data.frame() %>% 
    spread(exp, score_average) %>% remove_rownames() %>% column_to_rownames(., 
    var = "BlockID") %>% drop_na() %>% pheatmap()

8 Examples

Now I take a closer look at some drug combinations

boi = c("TAS-102_MK-8776/ SCH900776")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>% 
    filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average, 
    group = boi)) + geom_jitter(aes(color = line), width = 0.3) + 
    stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean", 
        colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE", 
    "FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi, 
    " method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).

syn_mat %>% filter(line == "07", BlockID == boi, Replicate == 
    1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP", 
    correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_Birinapant")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>% 
    filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average, 
    group = boi)) + geom_jitter(aes(color = line), width = 0.3) + 
    stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean", 
        colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE", 
    "FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi, 
    " method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).

syn_mat %>% filter(line == "07", BlockID == boi, Replicate == 
    1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP", 
    correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_AZD 5363")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>% 
    filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average, 
    group = boi)) + geom_jitter(aes(color = line), width = 0.3) + 
    stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean", 
        colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE", 
    "FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi, 
    " method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).

syn_mat %>% filter(line == "07", BlockID == boi, Replicate == 
    1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP", 
    correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)
boi = c("TAS-102_Bosutinib")
moi = "ZIP"
tas_syn %>% mutate(boi = if_else(BlockID %in% boi, TRUE, FALSE)) %>% 
    filter(method == moi) %>% ggplot(aes(as.factor(boi), score_average, 
    group = boi)) + geom_jitter(aes(color = line), width = 0.3) + 
    stat_summary(fun.y = "mean", fun.ymin = "mean", fun.ymax = "mean", 
        colour = "black", geom = "crossbar", width = 0.5) + geom_signif(comparisons = list(c("TRUE", 
    "FALSE")), method = "t.test") + theme_classic() + ggtitle(paste0(boi, 
    " method:", moi))
## Warning: Ignoring unknown parameters: method
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## Warning: Removed 39 rows containing non-finite values (stat_signif).
## Warning: Removed 39 rows containing missing values (geom_point).

syn_mat %>% filter(line == "07", BlockID == boi, Replicate == 
    1) %>% ReshapeData(., data.type = "viability") %>% CalculateSynergy(method = "ZIP", 
    correction = TRUE) %>% PlotSynergy(type = "all", save.file = FALSE)

9 To-Dos

Add library data and look for enrichments.

10 Conclusion